# coding:utf

import os
from pyspark import *
from operator import *
# from import 必须从包或.py文件开始，不能从文件夹开始
from package.defs_4 import *

if __name__ == '__main__':
    conf=SparkConf().setAppName("9_rdd_accumulator")\
    .setMaster("local[*]")
    sc=SparkContext(conf=conf)

    # localhost_path='file://'+os.path.dirname(os.path.dirname(os.getcwd()))+'/data/input/SogouQ.txt'
    # localhost_path="hdfs://hadoop3cluster/updown/input/SogouQ.txt"
    rdd1=sc.parallelize([1,2,3,4,5,6,7,8,9,10],2)

    accumalatorCount=sc.accumulator(0)

    def func_map(data):
        global accumalatorCount
        accumalatorCount +=1
        print(accumalatorCount)
        return data

    result=rdd1.map(func_map)
    # result.persist()
    result1=result.collect()
    print("全局变量count的值：", accumalatorCount)
    print(result1)
    # result.collect() 且之前没有缓存则执行玩result消失，再次用到溯源再构建
    rdd2=result.map(lambda x:x).collect()
    print(rdd2)
    print("全局变量count的值：",accumalatorCount)














